
# 来源 https://pymupdf.readthedocs.io/en/latest/pymupdf4llm/index.html

# convert the document to markdown
import pymupdf4llm


input_pdf="/Users/emery/Downloads/语料素材/1-概览/test-pdf2.pdf"
output_md = 'output-pdf-llm.md'

md_text = pymupdf4llm.to_markdown(input_pdf)

# Write the text to some file in UTF8-encoding
import pathlib
pathlib.Path(output_md  ).write_bytes(md_text.encode())







#
